#################################################
########## Descriptive Statistics ###############

# Data In: "posts.csv"
         # "comments.csv"
         # "metadata.csv" 
# Data Out: 
# Figure 1a
# Figure 1b
# Table 1
# Table A2

#################################################

# Load Packages
library(readr)
library(tidyverse)
library(lubridate)
library(xtable)

# Set options for plotting 
options(scipen=999999)

# Set working directory to replication folder (Refugee_Info_Replication)

# Open code file from "Refugee_Info_Replication/code/descriptive_statistics.R"

# Obtain the full path of the current script in RStudio
script_path <- rstudioapi::getActiveDocumentContext()$path

# If the script path is non-empty, proceed to set the working directory
if (!is.null(script_path)) {
  # Calculate the parent directory of the script's directory
  parent_directory <- dirname(dirname(script_path))
  
  # Set the working directory to the parent directory
  setwd(parent_directory)
} else {
  cat("Script path is not set. Ensure your script is saved and you are running RStudio.")
}
# Check Working Directory
getwd()

#Read in Posts, Comments & Metadata
posts<-read_csv("data/posts.csv")
comments<-read_csv("data/comments.csv")
metadata<-read_csv("data/metadata.csv")

############
# FIGURE 1 #
############

# Plot Posts per Month 
posts$count<-1

ppm = posts %>% 
  group_by(month = floor_date(date, unit="month")) %>%
  summarise(ppm = sum(count)) %>%
  filter(month >= ymd("2013-01-01") &month<=ymd("2018-08-30")) 

ppm %>% 
  ggplot() + 
  aes(x = month, y = ppm) + 
  geom_line(color="black")+
  labs(y = "Monthly Volume of Posts", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.position="none")
ggsave("plots/Figure_1a.pdf", width = 11, height = 7)

# Plot Comments per Month 
comments$count<-1

cpm = comments %>% 
  group_by(month = floor_date(date, unit="month")) %>%
  summarise(cpm = sum(count)) %>%
  filter(month >= ymd("2013-01-01") &month<=ymd("2018-08-30")) 

cpm %>% 
  ggplot() + 
  aes(x = month, y = cpm) + 
  #geom_smooth(method=loess, span=.05) +
  geom_line(color="black")+
  labs(y = "Monthly Volume of Comments", x = "Date")+
  theme_minimal(base_size=22)+
  theme(legend.position="none")
ggsave("plots/Figure_1b.pdf", width = 11, height = 7)


############
# TABLE 1 #
############

# Top Countries
country_table<-as.data.frame(table(metadata$page_host_country))
country_table<-country_table[order(-country_table$Freq),]
country_table<-country_table[1:11,]
print(xtable(country_table),include.rownames=FALSE)

############
# TABLE A2 #
############

# Page IDs 
page_ids<-as.data.frame(as.character(metadata$page_id))
xtable(page_ids)





